Description of Goals

We aim to use the maldifish appraoch to determine if mass spectral data classify according to symbiont/host tissue locations using both a spatial clustering framework and the discriminate analysis appraoch. Maldi data was peak-picked using PeakPicking.R script and processed to reduce dimensions (Data_Processing.R). Additionally, cluster analysis was preformed on the high-memory node in Cologne and resulting cluster data is in associated R Files. This script depends on the following RData Files : Cardinal_Processed_Data.RData, supervised-clustering-anlaysis.RData, unspervised-clustering-anlaysis.RData

Set up working space

rm(list=ls())
library(Cardinal)
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, cbind, colnames,
##     do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, lengths, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff,
##     sort, table, tapply, union, unique, unsplit
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: ProtGenerics
## Welcome to Cardinal (version 1.7.0)
## 
##     To get started, view the introductory vignettes with
##     'browseVignettes("Cardinal")'.
library(VennDiagram)
## Loading required package: grid
## Loading required package: futile.logger
library(ggplot2)
library(plyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:Cardinal':
## 
##     combine, select
## The following object is masked from 'package:Biobase':
## 
##     combine
## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dir<-"/home/maggie/Documents/Projects/maldifish/RAnalysis"
setwd(file.path(dir,'Data'))
load('Cardinal_Processed_Data.RData')

Add in a cross validation group for classification

pixelNo<-nrow(pData(maldifishmz))
randNum<-sample(1:10,size = pixelNo,replace = T) ## For now, just split the data 10 ways, increase to see how changes in overnight run
pData(maldifishmz)$cvgroup<-as.vector(randNum)

FISH BINNING OF METABLOMES

EXPLORATORY VISUALIZATION

First preform PCA with cardinal package

pca.mod<-PCA(maldifishmz,ncomp=4)
summary(pca.mod)
##                                PC1          PC2          PC3          PC4
## Standard deviation     8680.550952 7.057094e+03 5.026366e+03 4.667260e+03
## Proportion of Variance    0.150693 9.959791e-02 5.052505e-02 4.356346e-02
## Cumulative                0.150693 2.502909e-01 3.008159e-01 3.443794e-01
plot(summary(pca.mod))

Visualize PCA analysis

mycols<-gradient.colors(10,start='Blue', end='Red')
image(pca.mod, column=c('PC1','PC2','PC3'), superpose=F, col.regions=mycols)

image(pca.mod, column=c('PC1','PC2','PC3'), superpose=T, col.regions=mycols)

We can see clear seperation in the data based on PC components 1 and 2. We can also plot these points as “normal” score plots below colored by FISH annotation

## Set up data for plotting
pca.scores.red<-as.data.frame(pca.mod[[1]]$scores[maldifishmz$Class=='Red',])
pca.scores.red$Class<-'mox'

pca.scores.green<-as.data.frame(pca.mod[[1]]$scores[maldifishmz$Class=='Green',],Class='sox')
pca.scores.green$Class<-'sox'

pca.scores.tissue<-as.data.frame(pca.mod[[1]]$scores[maldifishmz$Class=='Tissue',],Class='host')
pca.scores.tissue$Class<-'host'

pca.scores.mixed<-as.data.frame(pca.mod[[1]]$scores[maldifishmz$Class=='Mixed',],Class='mixed')
pca.scores.mixed$Class<-'sox&mox'

pca.scores<-rbind(pca.scores.green,pca.scores.mixed,pca.scores.red,pca.scores.tissue)
pca.scores.reduced<-pca.scores[seq(1,nrow(pca.scores),15),]
ggplot(pca.scores.reduced, aes(x=PC1, y=PC2, color=Class)) + geom_point(alpha=0.5) + ggtitle('Plotting every 15th Pixel') ## Now plotting every 20 pixels or so to see distrubution of points 

pca.scores.symbionts<-pca.scores[pca.scores$Class %in% c('mox','sox','mixed'),]
ggplot(pca.scores, aes(x=PC1, y=PC2, color=Class)) + geom_point(alpha=0.5) + geom_point(data=pca.scores.symbionts,aes( x=PC1, y=PC2, color=Class))

Clearly there is a large variation in the host tissue. Just to confirm, re-run PCA with just host pixels

maldifishmz.host<-maldifishmz[,maldifishmz$Class=='Tissue']
pca.mod.2<-PCA(maldifishmz.host,ncomp=3)
image(pca.mod.2, column=c('PC1','PC2','PC3'), superpose=F, col.regions=mycols)

pca.scores.red<-as.data.frame(pca.mod.2[[1]]$scores)
ggplot(pca.scores, aes(x=PC1, y=PC2)) + geom_point(alpha=0.5) 

CLUSTERING ANALYSIS

To confirm observation that ciliated edge host pixels cluster independently of the tissue pixels in the symbiont region, we can visualize the unsupervised clustering of the comlete dataset using spatially aware k-means clustering method.

Load in clustering data

setwd(file.path(dir,'Results'))
load('unsupervised-clustering-anlaysis.RData')
ls()
##  [1] "df"                   "dir"                  "int_matrix"          
##  [4] "maldifishmz"          "maldifishmz.host"     "mycols"              
##  [7] "pca.mod"              "pca.mod.2"            "pca.scores"          
## [10] "pca.scores.green"     "pca.scores.mixed"     "pca.scores.red"      
## [13] "pca.scores.reduced"   "pca.scores.symbionts" "pca.scores.tissue"   
## [16] "peaks_in_data"        "pixelNo"              "randNum"             
## [19] "skma"                 "skmg"

Unsupervized

print('adaptive')
## [1] "adaptive"
summary(skma)
##   r k   method   time Within-Cluster SS Between-Cluster SS     Total SS
## 1 1 3 gaussian 25.299      9.197815e+12       1.495462e+12 1.069328e+13
## 2 1 5 gaussian 28.075      8.756170e+12       1.937107e+12 1.069328e+13
## 3 1 7 gaussian 32.455      8.429798e+12       2.263479e+12 1.069328e+13
## 4 2 3 gaussian 24.984      9.337381e+12       1.355896e+12 1.069328e+13
## 5 2 5 gaussian 28.335      8.954518e+12       1.738759e+12 1.069328e+13
## 6 2 7 gaussian 32.396      8.679523e+12       2.013755e+12 1.069328e+13
image(skma, key = T, layout = c(3, 2))

print('gaussian')
## [1] "gaussian"
summary(skmg)
##   r k   method   time Within-Cluster SS Between-Cluster SS     Total SS
## 1 1 3 gaussian 25.850      9.197815e+12       1.495462e+12 1.069328e+13
## 2 1 5 gaussian 27.884      8.756427e+12       1.936850e+12 1.069328e+13
## 3 1 7 gaussian 31.448      8.429798e+12       2.263479e+12 1.069328e+13
## 4 2 3 gaussian 25.179      9.337381e+12       1.355896e+12 1.069328e+13
## 5 2 5 gaussian 29.174      8.954518e+12       1.738759e+12 1.069328e+13
## 6 2 7 gaussian 31.902      8.679523e+12       2.013755e+12 1.069328e+13
image(skmg, key = T, layout = c(3,2))

In all cases, the ciliated edge clusters indepenentdely from the other tissue portions. Additionally, it looks like the adaptive and bausian methods preform about the same.

Supervised

In this analysis a supervised method was used to cluster the dataset into groups (n=4) based on our classification calles through the FISH data.

Load in data

setwd(file.path(dir,'Results'))
load('supervised-clustering-anlaysis.RData')
ls()
##  [1] "df"                   "dir"                  "int_matrix"          
##  [4] "maldifishmz"          "maldifishmz.host"     "mycols"              
##  [7] "pca.mod"              "pca.mod.2"            "pca.scores"          
## [10] "pca.scores.green"     "pca.scores.mixed"     "pca.scores.red"      
## [13] "pca.scores.reduced"   "pca.scores.symbionts" "pca.scores.tissue"   
## [16] "peaks_in_data"        "pixelNo"              "randNum"             
## [19] "skma"                 "skmg"                 "ssca.cv"             
## [22] "sscg.cv"

Summarize the supervised clustering analysis for both gaussian and adaptive methods

Gaussian

plot(summary(sscg.cv))

Adaptive

plot(summary(ssca.cv))

No difference bewteen the different radius adn the hitest accuracy value is at s=5, slightly more accuate for the adaptive method.

Plots the calls for the different cross validation groups for the adaptive method

image(ssca.cv, model=list(r=1, s=5))

These are the t-stats for the differnet ions that lead to differences between groups

ssc<-spatialShrunkenCentroids(maldifishmz,y=as.factor(maldifishmz$Class), r=3, s=5,method='adaptive')
plot(ssc, mode='tstatistics', model=list(r=3, s=5))

Summerize results here

summary(ssc)
##   r k s   method   time Predicted # of Classes
## 1 3 4 5 adaptive 59.914                      3
##   Mean # of Features per Class
## 1                           90
sigIons<-topLabels(ssc, n=90)
sigIons<-sigIons[order(sigIons$mz),]
sigIons
##          mz r k s classes     centers tstatistics     p.values
## 22 459.2494 3 4 5  Tissue   207.86158    7.844322 4.662937e-15
## 65 462.3349 3 4 5  Tissue    35.77109    3.281688 1.034211e-03
## 69 474.2589 3 4 5     Red   423.92051    3.037532 2.404410e-03
## 25 478.3303 3 4 5  Tissue   880.53828    7.584024 3.552714e-14
## 47 479.3339 3 4 5  Tissue    79.85195    4.632786 3.637808e-06
## 1  480.3454 3 4 5  Tissue  1481.00227   17.993608 0.000000e+00
## 6  481.3494 3 4 5  Tissue   196.80591   12.528334 0.000000e+00
## 80 482.3246 3 4 5  Tissue    31.32135    2.495625 1.258408e-02
## 4  482.3613 3 4 5  Tissue   316.02818   13.919077 0.000000e+00
## 66 483.3637 3 4 5  Tissue    25.66951    3.200104 1.376612e-03
## 51 490.2333 3 4 5     Red   248.01484    4.525730 6.237033e-06
## 2  496.3401 3 4 5  Tissue 11295.80337   16.849117 0.000000e+00
## 3  497.3437 3 4 5  Tissue  2399.58636   15.546633 0.000000e+00
## 89 498.3454 3 4 5  Tissue    41.28434    1.870626 6.141626e-02
## 40 498.3474 3 4 5  Tissue    83.87219    5.607212 2.092010e-08
## 10 502.3281 3 4 5  Tissue   506.67680   11.060725 0.000000e+00
## 31 504.3434 3 4 5  Tissue   121.45224    6.131170 8.939853e-10
## 67 508.3415 3 4 5  Tissue    43.72303    3.199340 1.380262e-03
## 35 508.3760 3 4 5  Tissue    41.40970    5.848870 5.051189e-09
## 12 510.3565 3 4 5  Tissue    98.77363   10.707216 0.000000e+00
## 75 516.3069 3 4 5     Red  9712.45095    2.856516 4.310699e-03
## 26 518.3017 3 4 5  Tissue   252.08832    7.523249 5.639933e-14
## 36 518.3224 3 4 5  Tissue  9306.37008    5.836749 5.431669e-09
## 90 520.3291 3 4 5  Tissue    50.55427    1.821545 6.854376e-02
## 21 522.3560 3 4 5  Tissue  2588.51163    7.865679 3.996803e-15
## 19 523.3599 3 4 5  Tissue   375.58364    8.386139 0.000000e+00
## 8  524.3720 3 4 5  Tissue   138.51303   11.285756 0.000000e+00
## 43 530.3226 3 4 5  Tissue   237.92587    5.192522 2.101487e-07
## 84 532.2799 3 4 5     Red  4976.77339    2.151632 3.150064e-02
## 85 532.3012 3 4 5   Green   419.24370    2.075937 3.798435e-02
## 81 532.3757 3 4 5  Tissue    33.34248    2.300437 2.143700e-02
## 83 534.2957 3 4 5  Tissue  4615.54837    2.171540 2.990581e-02
## 42 535.2990 3 4 5  Tissue   646.29543    5.292115 1.225941e-07
## 53 536.3706 3 4 5  Tissue    51.39487    4.395117 1.114661e-05
## 41 536.3728 3 4 5  Tissue    53.92360    5.313401 1.091202e-07
## 33 536.4071 3 4 5  Tissue    61.51614    6.079468 1.234742e-09
## 58 536.4093 3 4 5  Tissue    41.14476    3.608140 3.093933e-04
## 72 540.3070 3 4 5     Red  1179.53929    2.972576 2.975046e-03
## 74 548.2752 3 4 5   Green   189.36344    2.897298 3.791042e-03
## 27 550.3869 3 4 5  Tissue  2062.86791    7.086251 1.438849e-12
## 48 551.3917 3 4 5  Tissue   239.85997    4.621702 3.837435e-06
## 30 553.4356 3 4 5  Tissue   104.96280    6.732384 1.729017e-11
## 77 556.2809 3 4 5     Red   557.62306    2.694857 7.078533e-03
## 52 558.3541 3 4 5  Tissue    44.01010    4.412965 1.026684e-05
## 82 569.4286 3 4 5  Tissue  1604.32223    2.298746 2.153292e-02
## 49 570.3541 3 4 5     Red  2705.91514    4.555423 5.422511e-06
## 29 570.4340 3 4 5  Tissue   349.63355    6.740372 1.636868e-11
## 56 577.2627 3 4 5     Red  4970.27960    3.667316 2.490824e-04
## 88 586.3269 3 4 5     Red  1020.48805    1.902127 5.724391e-02
## 45 613.4197 3 4 5  Tissue   162.54107    5.080484 3.809502e-07
## 62 625.5063 3 4 5  Tissue    40.99535    3.416768 6.353567e-04
## 11 632.3566 3 4 5  Tissue   618.11557   10.773118 0.000000e+00
## 34 633.3590 3 4 5  Tissue    93.68295    5.941529 2.885639e-09
## 71 635.4017 3 4 5  Tissue   741.02815    2.992872 2.768114e-03
## 14 643.5176 3 4 5  Tissue   370.17361    9.137486 0.000000e+00
## 73 644.5223 3 4 5  Tissue    48.44834    2.928105 3.415420e-03
## 57 658.3715 3 4 5  Tissue   168.06113    3.640514 2.729995e-04
## 9  665.4992 3 4 5  Tissue   817.87161   11.187861 0.000000e+00
## 18 666.5035 3 4 5  Tissue   181.76378    8.493339 0.000000e+00
## 13 681.4735 3 4 5  Tissue   407.66598   10.247337 0.000000e+00
## 59 682.4774 3 4 5  Tissue    63.41344    3.600614 3.184807e-04
## 37 686.4033 3 4 5  Tissue   286.27535    5.825897 5.795905e-09
## 79 687.4062 3 4 5  Tissue    47.36445    2.522705 1.165575e-02
## 68 740.5591 3 4 5  Tissue   100.51170    3.056240 2.245217e-03
## 70 742.5761 3 4 5  Tissue    46.88954    3.012100 2.598752e-03
## 15 756.5536 3 4 5  Tissue   658.87753    8.995214 0.000000e+00
## 44 756.5567 3 4 5  Tissue   218.63893    5.136546 2.833046e-07
## 87 757.5560 3 4 5  Tissue    90.36813    1.967580 4.913454e-02
## 23 757.5590 3 4 5  Tissue   193.83776    7.744197 1.021405e-14
## 28 768.3716 3 4 5  Tissue   223.01212    6.915777 4.839240e-12
## 78 768.5898 3 4 5  Tissue    77.07392    2.563905 1.035975e-02
## 60 769.3772 3 4 5  Tissue    54.07894    3.537992 4.043883e-04
## 20 778.5377 3 4 5  Tissue   658.39340    8.336311 0.000000e+00
## 24 779.5380 3 4 5  Tissue   180.56083    7.698116 1.465494e-14
## 32 784.5870 3 4 5  Tissue   750.95118    6.094671 1.123192e-09
## 38 785.5887 3 4 5  Tissue   281.44648    5.725900 1.048367e-08
## 17 794.5099 3 4 5  Tissue   391.76970    8.729114 0.000000e+00
## 64 795.5148 3 4 5  Tissue    73.00089    3.308685 9.395372e-04
## 46 801.5164 3 4 5  Tissue    62.92512    4.805494 1.558492e-06
## 54 804.5519 3 4 5  Tissue   152.71050    4.233637 2.312965e-05
## 16 829.6446 3 4 5  Tissue   292.66759    8.740340 0.000000e+00
## 55 830.6474 3 4 5  Tissue    92.92830    4.211453 2.551992e-05
## 61 857.6752 3 4 5  Tissue   339.10801    3.476290 5.098254e-04
## 86 890.5555 3 4 5  Tissue    27.37432    2.045631 4.080997e-02
## 5  892.5704 3 4 5  Tissue   512.03832   13.778715 0.000000e+00
## 7  893.5742 3 4 5  Tissue   205.71450   11.400863 0.000000e+00
## 63 918.5877 3 4 5  Tissue    70.93409    3.317225 9.113030e-04
## 50 920.6035 3 4 5  Tissue   348.76897    4.539954 5.669974e-06
## 39 921.6056 3 4 5  Tissue   219.29355    5.675392 1.409046e-08
## 76 991.6730 3 4 5  Tissue    22.75901    2.741410 6.124793e-03
##    adj.p.values
## 22 1.685893e-12
## 65 8.961740e-02
## 69 1.895506e-01
## 25 1.128794e-11
## 47 4.828154e-04
## 1  0.000000e+00
## 6  0.000000e+00
## 80 8.512523e-01
## 4  0.000000e+00
## 66 1.157764e-01
## 51 7.431283e-04
## 2  0.000000e+00
## 3  0.000000e+00
## 89 1.000000e+00
## 40 3.655788e-06
## 10 0.000000e+00
## 31 1.994348e-07
## 67 1.157764e-01
## 35 9.807725e-07
## 12 0.000000e+00
## 75 3.182935e-01
## 26 1.739256e-11
## 36 1.035474e-06
## 90 1.000000e+00
## 21 1.496660e-12
## 19 0.000000e+00
## 8  0.000000e+00
## 43 3.389860e-05
## 84 1.000000e+00
## 85 1.000000e+00
## 81 1.000000e+00
## 83 1.000000e+00
## 42 2.008436e-05
## 53 1.270350e-03
## 41 1.816072e-05
## 33 2.642095e-07
## 58 3.031765e-02
## 72 2.276888e-01
## 74 2.819083e-01
## 27 4.190648e-10
## 48 5.029438e-04
## 30 4.316367e-09
## 77 4.981102e-01
## 52 1.182943e-03
## 82 1.000000e+00
## 49 6.611050e-04
## 29 4.185992e-09
## 56 2.560420e-02
## 88 1.000000e+00
## 45 5.788786e-05
## 62 5.742858e-02
## 11 0.000000e+00
## 34 5.708666e-07
## 71 2.134094e-01
## 14 0.000000e+00
## 73 2.594976e-01
## 57 2.752308e-02
## 9  0.000000e+00
## 18 0.000000e+00
## 13 0.000000e+00
## 59 3.063550e-02
## 37 1.085180e-06
## 79 7.935749e-01
## 68 1.824891e-01
## 70 2.022901e-01
## 15 0.000000e+00
## 44 4.433505e-05
## 87 1.000000e+00
## 23 3.569811e-12
## 28 1.333247e-09
## 78 7.193510e-01
## 60 3.819830e-02
## 20 0.000000e+00
## 24 4.956680e-12
## 32 2.453472e-07
## 38 1.928443e-06
## 17 0.000000e+00
## 64 8.209207e-02
## 46 2.150103e-04
## 54 2.579940e-03
## 16 0.000000e+00
## 55 2.816593e-03
## 61 4.772785e-02
## 86 1.000000e+00
## 5  0.000000e+00
## 7  0.000000e+00
## 63 8.029422e-02
## 50 6.833296e-04
## 39 2.547215e-06
## 76 4.398525e-01

Looks like we can accurately predict the mox signals from the tissue signals, but definitely can’t classify out the mixed signals (we do get some significant ions for the sox) Maybe worth repeating by taking out the mixed signal and running more cvs and potentially adding in a category for the ciliated edge tissue sections

DISCRIMINATE ANALYSIS

Next steps: Res-assign class groups by calling pixels from ciliated edge, cilated (in the clustering analysis this correpsonds to cluster group 1) & run OPLS-DA

clust.groups<-skma$cluster$`r = 2, k = 7`
group1<-clust.groups[which(clust.groups==1)]
grp1<-names(group1)

msidata<-maldifishmz
pData(msidata)[rownames(pData(msidata)) %in% grp1,'Class']<-'CiliatedEdge'

msidata.nociliatededge<-msidata[,msidata$Class %in% c('Red','Green','Tissue')]
pca.nociliated<-PCA(msidata.nociliatededge,ncomp=3)
plot(summary(pca.nociliated))

image(pca.nociliated, column=c('PC1','PC2'), superpose=T)

Comparing ciliated edge vs. tissue

ciliatedEdge<-msidata[,msidata$Class %in% c('CiliatedEdge', 'Tissue')]
table(ciliatedEdge$Class)
## 
## CiliatedEdge       Tissue 
##         2197        14914
ddd<-data.frame(pData(ciliatedEdge))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')

subset<-data.frame((pData(ciliatedEdge) %>% group_by(Class) %>% sample_n(size=1000,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')

dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)

ciliatedEdge.subset<-ciliatedEdge[,rownames(pData(ciliatedEdge)) %in% coordsSubset]
ciliatedEdge.subset
## An object of class "MSImageSet"
## Slot "processingData":
## Processing data
##   Cardinal version: 1.7.0 
##   Files: /opt/extern/bremen/symbiosis/sogin/MALDIFISH/Data/20161206_MPIBremen_Bputeoserpentis_MALDI-FISH8_Sl16_s1_DHB_233x233_3um.imzML
##          /opt/extern/bremen/symbiosis/sogin/MALDIFISH/Data/20161206_MPIBremen_Bputeoserpentis_MALDI-FISH8_Sl16_s1_DHB_233x233_3um.ibd 
##   Normalization: tic 
##   Smoothing:  
##   Baseline reduction:  
##   Spectrum representation:  
##   Peak picking: adaptive 
## 
## Slot "experimentData":
## Experiment data
##   Experimenter name:  
##   Laboratory:  
##   Contact:  
##   Title:  
##   URL:  
##   PMIDs:  
##   No abstract available.
## 
## Slot "imageData":
## An object of class 'MSImageData'
##   iData: 3495 x 2000 matrix (53.3 Mb)
##   mzData: 3495 x 2000 Hashmat (26.3 Mb)
##   peakData: 3495 x 2000 Hashmat (26.3 Mb)
## 
## Slot "pixelData":
## An object of class 'IAnnotatedDataFrame'
##   pixelNames: x = 4, y = 1 x = 111, y = 1 ... x = 219, y = 233
##     (2000 total)
##   varLabels: x y ... cvgroup (5 total)
##   varMetadata: labelType labelDescription
## 
## Slot "featureData":
## An object of class 'AnnotatedDataFrame'
##   featureNames: m/z = 405.059 m/z = 408.009 ... m/z = 1199.171
##     (3495 total)
##   varLabels: mz
##   varMetadata: labelDescription
## 
## Slot "protocolData":
## An object of class 'AnnotatedDataFrame': none
## 
## Slot ".__classVersion__":
##          R    Biobase       iSet  SImageSet MSImageSet 
##    "3.2.4"   "2.30.0"    "0.1.0"    "0.1.0"    "0.7.0"
table(ciliatedEdge.subset$Class)
## 
## CiliatedEdge       Tissue 
##         1000         1000
pixelNo<-nrow(pData(ciliatedEdge.subset))
randNum<-sample(1:10,size = pixelNo,replace = T) ## For now, just split the data 10 ways, increase to see how changes in overnight run
pData(ciliatedEdge.subset)$cvgroup<-as.vector(randNum)

OPLS

ciliated.opls.cv<-cvApply(ciliatedEdge.subset, .y = as.factor(ciliatedEdge.subset$Class), .fun = "OPLS", ncomp = 1:12, keep.Xnew = FALSE, .fold=cvgroup)
plot(summary(ciliated.opls.cv))

Re-run opls with correct number of components as predicted from model above and visualize

comp<-7
ciliated.opls<-OPLS(x=ciliatedEdge.subset,y=as.factor(ciliatedEdge.subset$Class), ncomp=comp)
summary(ciliated.opls)
## $`ncomp = 7`
##             CiliatedEdge     Tissue
## Accuracy      0.95750000 0.95750000
## Sensitivity   0.96900000 0.94600000
## Specificity   0.94600000 0.96900000
## FDR           0.05278592 0.03172979
plot(ciliated.opls)

topLabels(ciliated.opls, n=50)
##          mz ncomp       column coefficients    loadings     Oloadings
## 1  496.3401     7 CiliatedEdge 3.239119e-05  0.73613222  0.2399379697
## 2  518.3224     7 CiliatedEdge 1.103051e-05  0.24458378  0.3822193592
## 3  480.3454     7 CiliatedEdge 9.511102e-06  0.20801412  0.1314120541
## 4  497.3437     7 CiliatedEdge 9.462231e-06  0.21253774  0.1005666557
## 5  494.3251     7 CiliatedEdge 7.608535e-06  0.16592838  0.4745530852
## 6  577.2627     7       Tissue 7.487072e-06 -0.16765613  0.5086171706
## 7  534.2957     7 CiliatedEdge 6.998882e-06  0.15637335  0.1870215360
## 8  522.3560     7 CiliatedEdge 6.138194e-06  0.13635246  0.0607716237
## 9  516.3069     7       Tissue 5.563815e-06 -0.13185490  0.3278247546
## 10 546.4886     7       Tissue 4.536568e-06 -0.10211872 -0.4405931083
## 11 550.3869     7 CiliatedEdge 4.519819e-06  0.10160582  0.0372827860
## 12 569.4286     7 CiliatedEdge 4.053478e-06  0.09421348  0.0295034711
## 13 570.3541     7       Tissue 4.010556e-06 -0.08889690 -0.1134202202
## 14 770.5106     7       Tissue 3.569562e-06 -0.07967657 -0.1068790622
## 15 665.4992     7 CiliatedEdge 3.309602e-06  0.06879067  0.0726091983
## 16 754.5348     7       Tissue 3.246675e-06 -0.06844027 -0.1855584296
## 17 823.6794     7       Tissue 3.228638e-06 -0.07525794 -0.1346922135
## 18 632.3566     7 CiliatedEdge 3.175474e-06  0.06784323 -0.0310399584
## 19 495.3286     7 CiliatedEdge 3.129669e-06  0.06734487  0.0514867401
## 20 756.5536     7 CiliatedEdge 3.128505e-06  0.06822424  0.0636790508
## 21 502.3281     7 CiliatedEdge 3.018987e-06  0.06340708  0.0384983302
## 22 482.3613     7 CiliatedEdge 3.007241e-06  0.06625163 -0.0088338343
## 23 643.5176     7 CiliatedEdge 2.992980e-06  0.06479979  0.0276743218
## 24 569.4309     7 CiliatedEdge 2.916489e-06  0.05526297  0.2775774135
## 25 532.2799     7       Tissue 2.878674e-06 -0.06474777  0.1836766007
## 26 732.5543     7       Tissue 2.814010e-06 -0.06078307  0.0799069167
## 27 478.3303     7 CiliatedEdge 2.809902e-06  0.05787972  0.0066386560
## 28 754.5378     7       Tissue 2.710598e-06 -0.06700728  0.0272147063
## 29 635.4017     7 CiliatedEdge 2.530791e-06  0.05588484  0.1025697491
## 30 869.5375     7       Tissue 2.514012e-06 -0.05398143 -0.1140936948
## 31 535.2990     7 CiliatedEdge 2.493738e-06  0.05334045  0.0449067146
## 32 808.5847     7       Tissue 2.323568e-06 -0.05347421 -0.0898550524
## 33 892.5704     7 CiliatedEdge 2.318222e-06  0.04641680 -0.0349947121
## 34 805.6436     7       Tissue 2.292768e-06 -0.05312478  0.0761496745
## 35 681.4735     7 CiliatedEdge 2.257211e-06  0.04576850  0.0065494575
## 36 570.4340     7 CiliatedEdge 2.232608e-06  0.04728351  0.0553108010
## 37 519.3248     7 CiliatedEdge 2.220022e-06  0.04968112  0.0336779326
## 38 879.7426     7       Tissue 2.218522e-06 -0.05173264 -0.0752318014
## 39 548.3717     7       Tissue 2.139588e-06 -0.04445397 -0.2214521762
## 40 481.3494     7 CiliatedEdge 2.102943e-06  0.04557053 -0.0030095318
## 41 544.3375     7 CiliatedEdge 1.990968e-06  0.03776119  0.1237484724
## 42 523.3599     7 CiliatedEdge 1.968860e-06  0.04171723  0.0021593678
## 43 518.3017     7 CiliatedEdge 1.906351e-06  0.03897443  0.0005299637
## 44 839.6536     7       Tissue 1.869182e-06 -0.04179129 -0.0716935980
## 45 519.3268     7 CiliatedEdge 1.763878e-06  0.03544100  0.0221486369
## 46 586.3269     7       Tissue 1.745420e-06 -0.03726299 -0.0452867947
## 47 778.5377     7 CiliatedEdge 1.744804e-06  0.03206470  0.0341373902
## 48 721.0795     7       Tissue 1.707629e-06 -0.03026874  0.1184354413
## 49 524.3720     7 CiliatedEdge 1.621478e-06  0.03531604 -0.0123867351
## 50 595.4452     7 CiliatedEdge 1.620791e-06  0.03207545  0.0370568806
##        weights      Oweights
## 1   0.72298096  0.0308213359
## 2   0.24620433 -0.0068450540
## 3   0.21229064  0.0451899960
## 4   0.21119981 -0.0462485676
## 5   0.16982477  0.2724820005
## 6  -0.16711369  0.2985943475
## 7   0.15621713 -0.0001363059
## 8   0.13700633  0.0155491867
## 9  -0.12418601  0.2636653756
## 10 -0.10125756 -0.3282073752
## 11  0.10088372  0.0335348904
## 12  0.09047484  0.0209166992
## 13 -0.08951680 -0.0377595386
## 14 -0.07967369  0.1209882998
## 15  0.07387130  0.0542384786
## 16 -0.07246676  0.0075499672
## 17 -0.07206416  0.0669468311
## 18  0.07087752  0.0192262743
## 19  0.06985515  0.0506636213
## 20  0.06982916  0.0279621469
## 21  0.06738469  0.0549137055
## 22  0.06712251  0.0409688294
## 23  0.06680420 -0.0297323227
## 24  0.06509690  0.0952966239
## 25 -0.06425286  0.1864612595
## 26 -0.06280954  0.1565439139
## 27  0.06271785  0.0936287283
## 28 -0.06050136  0.0087573820
## 29  0.05648801  0.0293923567
## 30 -0.05611349 -0.0474400014
## 31  0.05566098  0.0255715041
## 32 -0.05186272 -0.0275504364
## 33  0.05174340  0.0274346264
## 34 -0.05117526  0.0674049890
## 35  0.05038163  0.0144389895
## 36  0.04983247  0.0476811468
## 37  0.04955156  0.0577411517
## 38 -0.04951808  0.0491521881
## 39 -0.04775625 -0.0804786883
## 40  0.04693830  0.0142450423
## 41  0.04443900  0.1479899594
## 42  0.04394553 -0.0067532256
## 43  0.04255033 -0.0243703786
## 44 -0.04172070  0.0463306299
## 45  0.03937029  0.0072351396
## 46 -0.03895830 -0.0304521941
## 47  0.03894454  0.0446536620
## 48 -0.03811480  0.0973690559
## 49  0.03619188 -0.0118912863
## 50  0.03617655  0.0423539955
image(ciliated.opls, model=list(ncomp=comp))

scores<-ciliated.opls@resultData$`ncomp = 7`
opls.scores<-data.frame(t0=scores$Oscores, t1=scores$scores, Class=scores$classes)
ggplot(opls.scores, aes(x=C1, y=t0.C4, color=Class)) + geom_point() + xlab('t[1]') + ylab('t[0]')

Conclusions: There are clear differences in the metabolome between the tissue pixels and the ciliated edge pixels. This is intriguing because it suggests that either the host is somehow controlling the symbiont assemablges at the cilaited edge of the tissue through secretion of interesting metabololites (check annotations) or the symbionts are some how re-modeling the tissue structures inside the host tissue. Could be a nice interesting comparison of how hosts are different with bacteria and without.

Either way, we will need to keep the ciliated edge pixels out of the complete analysis.

OPLS Analysis with Ciliated Edge Category

msidata<-msidata[,!msidata$Class %in% c('Mixed')]
table(msidata$Class)
## 
## CiliatedEdge        Green          Red       Tissue 
##         2197          788         3022        14914
ddd<-data.frame(pData(msidata))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')

subset<-data.frame((pData(msidata) %>% group_by(Class) %>% sample_n(size=700,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')

dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)

msidata.subset<-msidata[,rownames(pData(msidata)) %in% coordsSubset]
msidata.subset
## An object of class "MSImageSet"
## Slot "processingData":
## Processing data
##   Cardinal version: 1.7.0 
##   Files: /opt/extern/bremen/symbiosis/sogin/MALDIFISH/Data/20161206_MPIBremen_Bputeoserpentis_MALDI-FISH8_Sl16_s1_DHB_233x233_3um.imzML
##          /opt/extern/bremen/symbiosis/sogin/MALDIFISH/Data/20161206_MPIBremen_Bputeoserpentis_MALDI-FISH8_Sl16_s1_DHB_233x233_3um.ibd 
##   Normalization: tic 
##   Smoothing:  
##   Baseline reduction:  
##   Spectrum representation:  
##   Peak picking: adaptive 
## 
## Slot "experimentData":
## Experiment data
##   Experimenter name:  
##   Laboratory:  
##   Contact:  
##   Title:  
##   URL:  
##   PMIDs:  
##   No abstract available.
## 
## Slot "imageData":
## An object of class 'MSImageData'
##   iData: 3495 x 2800 matrix (74.7 Mb)
##   mzData: 3495 x 2800 Hashmat (34.1 Mb)
##   peakData: 3495 x 2800 Hashmat (34.1 Mb)
## 
## Slot "pixelData":
## An object of class 'IAnnotatedDataFrame'
##   pixelNames: x = 31, y = 1 x = 37, y = 1 ... x = 225, y = 233
##     (2800 total)
##   varLabels: x y ... cvgroup (5 total)
##   varMetadata: labelType labelDescription
## 
## Slot "featureData":
## An object of class 'AnnotatedDataFrame'
##   featureNames: m/z = 405.059 m/z = 408.009 ... m/z = 1199.171
##     (3495 total)
##   varLabels: mz
##   varMetadata: labelDescription
## 
## Slot "protocolData":
## An object of class 'AnnotatedDataFrame': none
## 
## Slot ".__classVersion__":
##          R    Biobase       iSet  SImageSet MSImageSet 
##    "3.2.4"   "2.30.0"    "0.1.0"    "0.1.0"    "0.7.0"
table(msidata.subset$Class)
## 
## CiliatedEdge        Green          Red       Tissue 
##          700          700          700          700
pixelNo<-nrow(pData(msidata.subset))
randNum<-sample(1:10,size = pixelNo,replace = T) ## For now, just split the data 10 ways, increase to see how changes in overnight run
pData(msidata.subset)$cvgroup<-as.vector(randNum)

OPLS cv analysis

pls.cv<-cvApply(msidata.subset, .y = as.factor(msidata.subset$Class), .fun = "PLS", ncomp = 1:25, keep.Xnew = FALSE, .fold=cvgroup)
plot(summary(pls.cv))

Visualization

comp<-20
pls<-PLS(x=msidata.subset,y=as.factor(msidata.subset$Class), ncomp=comp)
summary(pls)
## $`ncomp = 20`
##             CiliatedEdge     Green       Red    Tissue
## Accuracy      0.97821429 0.7857143 0.7910714 0.7892857
## Sensitivity   0.98000000 0.5728571 0.5985714 0.5371429
## Specificity   0.97761905 0.8566667 0.8552381 0.8733333
## FDR           0.06412005 0.4287749 0.4204703 0.4143302
plot(pls)

topLabels(pls, n=50)
##          mz ncomp       column coefficients      loadings       weights
## 1  870.5432    20       Tissue 4.541147e-05 -0.0627238578  1.712640e-02
## 2  471.0298    20       Tissue 4.004871e-05 -0.0235705217  4.469432e-02
## 3  577.2604    20       Tissue 3.910770e-05 -0.0018499513  1.823777e-02
## 4  532.3012    20        Green 3.825126e-05 -0.0331103456  1.695989e-02
## 5  413.2668    20          Red 3.796696e-05  0.0241464634 -6.773192e-03
## 6  476.3141    20 CiliatedEdge 3.651823e-05 -0.0037021743  1.919038e-02
## 7  621.4612    20        Green 3.558655e-05 -0.0840761925  1.538020e-02
## 8  840.6551    20        Green 3.367640e-05 -0.0218645141 -3.588757e-02
## 9  520.3395    20          Red 3.317180e-05  0.0445773823  2.195467e-02
## 10 595.4452    20          Red 3.283511e-05  0.0330867518  2.187287e-02
## 11 785.5887    20       Tissue 3.250745e-05  0.0130222003  1.006211e-02
## 12 519.4972    20          Red 3.176385e-05 -0.0117457424 -8.408236e-02
## 13 826.6996    20          Red 3.095984e-05  0.0213342186  3.904842e-02
## 14 721.0824    20          Red 3.080321e-05 -0.0206364275 -3.766001e-02
## 15 857.0956    20          Red 3.075795e-05  0.0715318939  1.416225e-02
## 16 768.3716    20 CiliatedEdge 3.001690e-05  0.0164817310  2.597013e-02
## 17 804.5519    20       Tissue 2.894711e-05  0.0004555756  1.465559e-03
## 18 518.3017    20 CiliatedEdge 2.881403e-05  0.0122909214  3.961809e-03
## 19 474.2589    20          Red 2.860703e-05  0.0538503472 -6.388303e-03
## 20 518.3245    20        Green 2.860300e-05 -0.1426045391  1.443283e-02
## 21 547.4731    20        Green 2.825669e-05  0.0527408117  1.274640e-01
## 22 784.5838    20          Red 2.811974e-05  0.0860481999  1.777998e-02
## 23 610.1845    20       Tissue 2.786738e-05  0.0130476844  6.467886e-03
## 24 716.1269    20          Red 2.747382e-05 -0.0051457776 -2.017616e-02
## 25 895.7178    20        Green 2.724556e-05 -0.0375363084  1.168511e-02
## 26 548.2752    20        Green 2.697298e-05 -0.0115935734 -3.168529e-02
## 27 684.3883    20          Red 2.617944e-05  0.0124060039  2.280228e-02
## 28 817.1032    20          Red 2.615651e-05 -0.0016019814 -4.849431e-02
## 29 758.5687    20       Tissue 2.577813e-05  0.0243922987  2.191520e-02
## 30 768.5160    20          Red 2.512371e-05 -0.0020239220  2.459641e-02
## 31 682.0926    20       Tissue 2.488668e-05  0.0109732232  1.646473e-02
## 32 558.4859    20       Tissue 2.485806e-05 -0.0333801506 -7.439765e-02
## 33 758.5718    20       Tissue 2.465786e-05  0.0227073496  1.391735e-02
## 34 611.4037    20          Red 2.456193e-05  0.0085816064  1.829237e-02
## 35 490.2333    20          Red 2.447344e-05  0.0053377393 -8.191218e-05
## 36 535.2990    20       Tissue 2.436977e-05  0.0097259210  3.155073e-02
## 37 809.5847    20        Green 2.399579e-05 -0.0338460025 -3.456956e-02
## 38 923.6206    20       Tissue 2.397596e-05 -0.0164779855 -3.602164e-03
## 39 953.1206    20       Tissue 2.383393e-05  0.0281042667 -7.450008e-04
## 40 586.0689    20       Tissue 2.362250e-05 -0.0151307557 -3.496996e-02
## 41 585.0641    20       Tissue 2.338168e-05 -0.0934221883 -3.391507e-02
## 42 494.3231    20        Green 2.335323e-05 -0.0287731605 -3.577775e-02
## 43 752.5214    20       Tissue 2.329270e-05 -0.0311219037  1.667660e-02
## 44 510.3197    20        Green 2.224570e-05 -0.0132914082  8.012489e-03
## 45 894.5863    20          Red 2.202010e-05  0.0167391101  3.496549e-02
## 46 994.1155    20          Red 2.196977e-05  0.0294718949  3.322391e-02
## 47 738.0573    20          Red 2.152803e-05  0.0259167203  1.932512e-02
## 48 880.7461    20        Green 2.141732e-05  0.0241370558  5.004728e-02
## 49 756.5536    20 CiliatedEdge 2.141574e-05  0.0652108593  1.206379e-02
## 50 607.0465    20       Tissue 2.133901e-05 -0.0193759134  2.695736e-02
image(pls, model=list(ncomp=comp))

scores<-pls@resultData$`ncomp = 20`$scores
classes<-pls@resultData$`ncomp = 20`$y
pls.scores<-data.frame(scores, Classes=classes)
ggplot(pls.scores, aes(x=C1, y=C2, color=Classes)) + geom_point() 

ggplot(pls.scores, aes(x=C1, y=C3, color=Classes)) + geom_point() 

ggplot(pls.scores, aes(x=C2, y=C3, color=Classes)) + geom_point() 

ggplot(pls.scores, aes(x=C1, y=C20, color=Classes)) + geom_point() 

No Ciliated Edge Pixels

Three comparisons pairwise comparisons
host vs. mox host vs. sox sox vs. mox

PLS

table(msidata.nociliatededge$Class)
## 
##  Green    Red Tissue 
##    788   3022  14914
ddd<-data.frame(pData(msidata.nociliatededge))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')

subset<-data.frame((pData(msidata.nociliatededge) %>% group_by(Class) %>% sample_n(size=700,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')

dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)

nce.subset<-msidata.nociliatededge[,rownames(pData(msidata.nociliatededge)) %in% coordsSubset]
nce.subset
## An object of class "MSImageSet"
## Slot "processingData":
## Processing data
##   Cardinal version: 1.7.0 
##   Files: /opt/extern/bremen/symbiosis/sogin/MALDIFISH/Data/20161206_MPIBremen_Bputeoserpentis_MALDI-FISH8_Sl16_s1_DHB_233x233_3um.imzML
##          /opt/extern/bremen/symbiosis/sogin/MALDIFISH/Data/20161206_MPIBremen_Bputeoserpentis_MALDI-FISH8_Sl16_s1_DHB_233x233_3um.ibd 
##   Normalization: tic 
##   Smoothing:  
##   Baseline reduction:  
##   Spectrum representation:  
##   Peak picking: adaptive 
## 
## Slot "experimentData":
## Experiment data
##   Experimenter name:  
##   Laboratory:  
##   Contact:  
##   Title:  
##   URL:  
##   PMIDs:  
##   No abstract available.
## 
## Slot "imageData":
## An object of class 'MSImageData'
##   iData: 3495 x 2100 matrix (56 Mb)
##   mzData: 3495 x 2100 Hashmat (24.2 Mb)
##   peakData: 3495 x 2100 Hashmat (24.2 Mb)
## 
## Slot "pixelData":
## An object of class 'IAnnotatedDataFrame'
##   pixelNames: x = 26, y = 1 x = 31, y = 1 ... x = 225, y = 233
##     (2100 total)
##   varLabels: x y ... cvgroup (5 total)
##   varMetadata: labelType labelDescription
## 
## Slot "featureData":
## An object of class 'AnnotatedDataFrame'
##   featureNames: m/z = 405.059 m/z = 408.009 ... m/z = 1199.171
##     (3495 total)
##   varLabels: mz
##   varMetadata: labelDescription
## 
## Slot "protocolData":
## An object of class 'AnnotatedDataFrame': none
## 
## Slot ".__classVersion__":
##          R    Biobase       iSet  SImageSet MSImageSet 
##    "3.2.4"   "2.30.0"    "0.1.0"    "0.1.0"    "0.7.0"
table(nce.subset$Class)
## 
##  Green    Red Tissue 
##    700    700    700
## Assign numbers for CV groups
pixelNo<-nrow(pData(nce.subset))
randNum<-sample(1:10,size = pixelNo,replace = T) ## For now, just split the data 10 ways, increase to see how changes in overnight run
pData(nce.subset)$cvgroup<-as.vector(randNum)
nce.pls.cv<-cvApply(nce.subset, .y = as.factor(nce.subset$Class), .fun = "PLS", ncomp = 1:12, keep.Xnew = FALSE, .fold=cvgroup)
plot(summary(nce.pls.cv))

comp<-3
nce.pls<-PLS(x=nce.subset,y=as.factor(nce.subset$Class), ncomp=comp)
summary(nce.pls)
## $`ncomp = 3`
##                 Green        Red    Tissue
## Accuracy    0.6347619 0.65190476 0.6152381
## Sensitivity 0.6957143 0.04571429 0.6114286
## Specificity 0.6042857 0.95500000 0.6171429
## FDR         0.5321806 0.66315789 0.5560166
plot(nce.pls)

topLabels(nce.pls, n=50)
##          mz ncomp column coefficients     loadings      weights
## 1  665.4992     3 Tissue 1.490006e-05 -0.075846374 -0.092410295
## 2  895.7178     3  Green 1.351666e-05  0.028883947  0.076503804
## 3  532.2820     3  Green 1.231486e-05  0.108419037  0.013681086
## 4  839.6536     3  Green 1.216437e-05  0.018969337 -0.001047237
## 5  532.3012     3  Green 1.197690e-05  0.049835419  0.065886862
## 6  494.3251     3  Green 1.137939e-05  0.202689986 -0.234872802
## 7  859.6913     3  Green 1.120205e-05 -0.011779078  0.077638725
## 8  547.4731     3  Green 1.115032e-05  0.035285717  0.048682625
## 9  868.5711     3  Green 1.084311e-05  0.108354338  0.130314280
## 10 823.6794     3  Green 1.077731e-05 -0.043241954 -0.062884416
## 11 562.4833     3  Green 1.011369e-05  0.023559675  0.020769387
## 12 621.4612     3  Green 1.006500e-05 -0.013630506  0.028189995
## 13 510.3197     3  Green 9.777445e-06  0.044244627  0.066130917
## 14 544.3375     3 Tissue 9.453162e-06 -0.162549634 -0.041087405
## 15 480.3454     3 Tissue 9.351501e-06  0.019160301 -0.031163098
## 16 577.2627     3 Tissue 9.060154e-06 -0.411738604 -0.106523988
## 17 548.2752     3  Green 8.979246e-06  0.026212739  0.057256287
## 18 880.7461     3  Green 8.945407e-06  0.003393503  0.028878754
## 19 544.3397     3  Green 8.298925e-06  0.037455948  0.006338039
## 20 567.4141     3  Green 8.232806e-06 -0.068622082 -0.159505054
## 21 572.3699     3 Tissue 8.205961e-06 -0.110845676 -0.079505483
## 22 532.2799     3  Green 8.180554e-06 -0.164321720 -0.163295203
## 23 732.5543     3 Tissue 8.085319e-06 -0.209110308 -0.145004007
## 24 586.3292     3  Green 7.639761e-06  0.017705397  0.036136556
## 25 540.3070     3 Tissue 7.579065e-06 -0.101666881 -0.119383082
## 26 896.7180     3  Green 7.503089e-06  0.012333013  0.037644458
## 27 681.4735     3 Tissue 7.502975e-06 -0.022193216 -0.043572797
## 28 533.2836     3  Green 7.307487e-06  0.001251660  0.023695513
## 29 517.3095     3 Tissue 7.231884e-06 -0.103363662 -0.102026755
## 30 840.6551     3  Green 6.912892e-06  0.004585491  0.031484881
## 31 746.5342     3  Green 6.830426e-06  0.015768128  0.053567311
## 32 569.4309     3 Tissue 6.814505e-06 -0.037894582 -0.065635829
## 33 494.3231     3  Green 6.756069e-06  0.009996435  0.061858609
## 34 502.3281     3 Tissue 6.736477e-06 -0.012214431 -0.048431213
## 35 643.5176     3 Tissue 6.669014e-06 -0.014812609 -0.041597146
## 36 784.5870     3 Tissue 6.551177e-06 -0.038602337 -0.032765018
## 37 782.5685     3 Tissue 6.248723e-06 -0.093052561 -0.044918363
## 38 492.3084     3  Green 6.244946e-06  0.023010874  0.040703186
## 39 518.4945     3 Tissue 6.244399e-06 -0.185855384 -0.117180887
## 40 824.6816     3  Green 6.153319e-06 -0.011079237  0.014865245
## 41 768.5160     3  Green 6.046858e-06  0.013121886  0.046176588
## 42 754.5348     3 Tissue 6.016647e-06 -0.253565024 -0.092915222
## 43 508.3049     3  Green 5.774939e-06  0.025235797  0.040902702
## 44 546.4886     3  Green 5.765558e-06  0.003536713 -0.140004598
## 45 824.5563     3  Green 5.644082e-06 -0.045021148  0.040413775
## 46 733.5571     3  Green 5.547772e-06 -0.022910478  0.038796784
## 47 681.0893     3 Tissue 5.452878e-06  0.332733908  0.180353324
## 48 879.7391     3  Green 5.275354e-06 -0.001024984  0.029758028
## 49 869.5722     3  Green 5.222157e-06  0.015984385  0.073051408
## 50 519.3248     3 Tissue 5.206831e-06 -0.036484134 -0.031957908
image(nce.pls, model=list(ncomp=comp))

scores<-nce.pls@resultData$`ncomp = 3`$scores
classes<-nce.pls@resultData$`ncomp = 3`$y
pls.scores<-data.frame(scores, Classes=classes)
ggplot(pls.scores, aes(x=C1, y=C2, color=Classes)) + geom_point() 

ggplot(pls.scores, aes(x=C1, y=C3, color=Classes)) + geom_point() 

No predictive power for red green tissue siganls with OPLS

OPLS

Host vs. mox

hostmox<-msidata.nociliatededge[,msidata.nociliatededge$Class %in% c('Red','Tissue')]
table(hostmox$Class)
## 
##    Red Tissue 
##   3022  14914
ddd<-data.frame(pData(hostmox))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')

subset<-data.frame((pData(hostmox) %>% group_by(Class) %>% sample_n(size=700,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')

dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)

hostmox.subset<-hostmox[,rownames(pData(hostmox)) %in% coordsSubset]
table(hostmox.subset$Class)
## 
##    Red Tissue 
##    700    700
## Assign numbers for CV groups
pixelNo<-nrow(pData(hostmox.subset))
randNum<-sample(1:10,size = pixelNo,replace = T) ## For now, just split the data 10 ways, increase to see how changes in overnight run
pData(hostmox.subset)$cvgroup<-as.vector(randNum)

Preform opls cross validation to select best model representation

hostmox.opls.cv<-cvApply(hostmox.subset, .y = as.factor(hostmox.subset$Class), .fun = "OPLS", ncomp = 1:12, keep.Xnew = FALSE, .fold=cvgroup)
plot(summary(hostmox.opls.cv))

Re-run opls on all the data & visualize

comp<-3
hostmox.opls<-OPLS(x=hostmox.subset,y=as.factor(hostmox.subset$Class), ncomp=comp)
summary(hostmox.opls)
## $`ncomp = 3`
##                   Red    Tissue
## Accuracy    0.6278571 0.6278571
## Sensitivity 0.6414286 0.6142857
## Specificity 0.6142857 0.6414286
## FDR         0.3755216 0.3685756
plot(hostmox.opls)

topLabels(hostmox.opls, n=50)
##          mz ncomp column coefficients     loadings    Oloadings
## 1  496.3401     3 Tissue 2.135694e-05 -0.804409926  0.041056184
## 2  516.3069     3    Red 1.665970e-05  0.272343094  0.209613463
## 3  497.3437     3 Tissue 1.399554e-05 -0.237158172 -0.001495411
## 4  518.4945     3    Red 1.190377e-05  0.130659058  0.508431213
## 5  823.6794     3    Red 1.155629e-05  0.205469681  0.013551730
## 6  548.3717     3    Red 1.065344e-05  0.099440642  0.140198976
## 7  567.4141     3    Red 1.050522e-05  0.167943850  0.083990148
## 8  681.0893     3 Tissue 9.794991e-06 -0.017585245  0.055703936
## 9  577.2627     3    Red 9.301900e-06  0.278233672 -0.157105921
## 10 532.2799     3    Red 9.000590e-06  0.148727969  0.007948307
## 11 778.5377     3 Tissue 8.836493e-06 -0.092017096 -0.028575930
## 12 544.3375     3 Tissue 8.608449e-06 -0.120949547 -0.092415745
## 13 922.6163     3 Tissue 8.581353e-06 -0.121042201 -0.042511603
## 14 570.3541     3    Red 8.571665e-06  0.148699445 -0.038090837
## 15 754.5378     3 Tissue 8.543638e-06 -0.187619835 -0.239867019
## 16 569.4286     3 Tissue 8.518809e-06 -0.152887015 -0.044520803
## 17 755.5405     3 Tissue 8.362629e-06 -0.109989495 -0.106059005
## 18 879.7426     3    Red 8.028165e-06  0.138673667  0.005336956
## 19 756.5536     3 Tissue 8.006730e-06 -0.114297076 -0.006131633
## 20 892.5704     3 Tissue 7.972949e-06 -0.082652393 -0.007862662
## 21 586.3269     3    Red 7.608917e-06  0.079264121 -0.019305390
## 22 540.3070     3    Red 7.452424e-06  0.089767913  0.004462370
## 23 621.4612     3    Red 7.359374e-06  0.072319916 -0.019063341
## 24 544.3397     3    Red 6.786283e-06  0.089835063  0.049933289
## 25 534.2957     3    Red 6.526617e-06  0.001407485 -0.141480730
## 26 824.5563     3 Tissue 6.467815e-06 -0.072005950 -0.068098439
## 27 632.3566     3 Tissue 6.274317e-06 -0.067904116 -0.010896090
## 28 780.5520     3 Tissue 6.246629e-06 -0.072056395 -0.077086498
## 29 868.5711     3 Tissue 6.235068e-06 -0.154365552 -0.089590609
## 30 474.2589     3    Red 5.968259e-06  0.041077446  0.025028064
## 31 825.6950     3    Red 5.904775e-06  0.056054692 -0.006053856
## 32 784.5870     3 Tissue 5.839103e-06 -0.098044902  0.003731799
## 33 806.5690     3 Tissue 5.752278e-06 -0.084138758 -0.065156686
## 34 808.5815     3 Tissue 5.585980e-06 -0.060154259 -0.008715069
## 35 478.3303     3 Tissue 5.469181e-06 -0.056128942 -0.039408418
## 36 520.3395     3    Red 5.002724e-06  0.010431996  0.015247002
## 37 829.6446     3 Tissue 4.955506e-06 -0.052361037 -0.015886919
## 38 532.2820     3    Red 4.907254e-06  0.089437591  0.042307759
## 39 806.6464     3 Tissue 4.845583e-06 -0.036900960 -0.045802780
## 40 770.5106     3    Red 4.809199e-06  0.086386406 -0.147092032
## 41 532.3012     3    Red 4.624105e-06  0.029986256 -0.012574452
## 42 869.5375     3    Red 4.545042e-06  0.097677091 -0.066348250
## 43 480.3454     3 Tissue 4.537772e-06 -0.135720267  0.009375241
## 44 895.7178     3    Red 4.526636e-06  0.047807858 -0.009315045
## 45 817.1032     3 Tissue 4.525837e-06  0.022910658  0.008408269
## 46 578.2657     3    Red 4.419961e-06  0.074741798 -0.030064171
## 47 542.3228     3    Red 4.383377e-06  0.031406106 -0.004090966
## 48 857.6752     3 Tissue 4.343194e-06 -0.042124457 -0.024858400
## 49 585.0664     3    Red 4.230839e-06  0.020223648 -0.003923420
## 50 569.4309     3    Red 4.081732e-06  0.040126145  0.001309379
##        weights     Oweights
## 1  -0.32230454  0.112486004
## 2   0.25141695  0.133322521
## 3  -0.21121128  0.066346772
## 4   0.17964369  0.363171497
## 5   0.17439985  0.052882160
## 6   0.16077456  0.178727880
## 7   0.15853768  0.049457430
## 8  -0.14781944  0.117258391
## 9   0.14037803 -0.257564255
## 10  0.13583087  0.043382981
## 11 -0.13335443  0.004201804
## 12 -0.12991295 -0.086750916
## 13 -0.12950402 -0.028164107
## 14  0.12935782  0.003222971
## 15 -0.12893486 -0.170723880
## 16 -0.12856016 -0.081405604
## 17 -0.12620319 -0.112708570
## 18  0.12115568  0.027540057
## 19 -0.12083220  0.019576297
## 20 -0.12032239  0.035478488
## 21  0.11482868 -0.051915672
## 22  0.11246698 -0.024628269
## 23  0.11106273 -0.056928478
## 24  0.10241404  0.045966926
## 25  0.09849533 -0.028980189
## 26 -0.09760792 -0.050150336
## 27 -0.09468779  0.030037229
## 28 -0.09426994 -0.053335500
## 29 -0.09409546 -0.068515515
## 30  0.09006896 -0.024880097
## 31  0.08911090 -0.041114925
## 32 -0.08811982  0.020434614
## 33 -0.08680952 -0.066528271
## 34 -0.08429987 -0.019220257
## 35 -0.08253721 -0.013881399
## 36  0.07549776 -0.048894814
## 37 -0.07478517  0.004574602
## 38  0.07405699  0.063186697
## 39 -0.07312629 -0.008582946
## 40  0.07257721 -0.111602717
## 41  0.06978389 -0.050217255
## 42  0.06859073 -0.053247432
## 43 -0.06848100 -0.019517521
## 44  0.06831296 -0.020704497
## 45 -0.06830089  0.042756103
## 46  0.06670308 -0.082120299
## 47  0.06615098 -0.049364161
## 48 -0.06554457 -0.002730931
## 49  0.06384899 -0.049786153
## 50  0.06159877  0.034411144
image(hostmox.opls, model=list(ncomp=comp))

scores<-hostmox.opls@resultData$`ncomp = 3`
opls.scores<-data.frame(t0=scores$Oscores, t1=scores$scores, Class=scores$classes)
ggplot(opls.scores, aes(x=C1, y=t0.C3, color=Class)) + geom_point() + xlab('t[1]') + ylab('t[0]')

Still not a significant modeling attempt (fdr > 0.05)

Host vs. sox

hostsox<-msidata.nociliatededge[,msidata.nociliatededge$Class %in% c('Green','Tissue')]
table(hostsox$Class)
## 
##  Green Tissue 
##    788  14914
ddd<-data.frame(pData(hostsox))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')

subset<-data.frame((pData(hostsox) %>% group_by(Class) %>% sample_n(size=700,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')

dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)

hostsox.subset<-hostsox[,rownames(pData(hostsox)) %in% coordsSubset]
table(hostsox.subset$Class)
## 
##  Green Tissue 
##    700    700
## Assign numbers for CV groups
pixelNo<-nrow(pData(hostsox.subset))
randNum<-sample(1:10,size = pixelNo,replace = T) ## For now, just split the data 10 ways, increase to see how changes in overnight run
pData(hostsox.subset)$cvgroup<-as.vector(randNum)
hostsox.subset.cv<-cvApply(hostsox.subset, .y = as.factor(hostsox.subset$Class), .fun = "OPLS", ncomp = 1:12, keep.Xnew = FALSE, .fold=cvgroup)
plot(summary(hostsox.subset.cv))

comp<-1
hostsox.opls<-OPLS(x=hostsox.subset,y=as.factor(hostsox.subset$Class), ncomp=comp)
summary(hostsox.opls)
## $`ncomp = 1`
##                 Green    Tissue
## Accuracy    0.6392857 0.6392857
## Sensitivity 0.6671429 0.6114286
## Specificity 0.6114286 0.6671429
## FDR         0.3680650 0.3524962
plot(hostsox.opls)

topLabels(hostsox.opls, n=50)
##          mz ncomp column coefficients     loadings     Oloadings
## 1  496.3401     1 Tissue 2.190190e-05 -0.834286741 -0.0027699508
## 2  518.3224     1 Tissue 1.413832e-05 -0.661216839  0.2861478194
## 3  516.3069     1  Green 1.195781e-05  0.133363640  0.4655180309
## 4  567.4141     1  Green 1.117102e-05  0.131465451  0.1832301685
## 5  532.2799     1  Green 1.060103e-05  0.161870826  0.2364735798
## 6  823.6794     1  Green 9.136929e-06  0.150889605  0.0845210769
## 7  665.4992     1 Tissue 8.296807e-06 -0.100113648  0.0080616156
## 8  839.6536     1  Green 8.217631e-06  0.124286106  0.0375487631
## 9  621.4612     1  Green 7.498719e-06  0.067450013  0.0481353912
## 10 545.0717     1 Tissue 7.375550e-06  0.001654365 -0.2262826831
## 11 681.0893     1 Tissue 7.329858e-06  0.046608625 -0.4256122007
## 12 497.3437     1 Tissue 6.446534e-06 -0.156581441 -0.0236131110
## 13 879.7426     1  Green 6.406377e-06  0.096615371  0.0533194065
## 14 570.3541     1  Green 6.265407e-06  0.094725062  0.1389407121
## 15 480.3454     1 Tissue 6.202877e-06 -0.093589049 -0.0290737032
## 16 806.5690     1 Tissue 6.167139e-06 -0.071750695  0.0404688060
## 17 532.2820     1  Green 6.018555e-06  0.073693905  0.0197033783
## 18 546.4886     1  Green 5.981118e-06  0.096477860 -0.0241823383
## 19 732.5543     1 Tissue 5.903190e-06 -0.276665868  0.2172158159
## 20 562.4833     1  Green 5.794104e-06  0.084942187  0.0005772117
## 21 569.4286     1 Tissue 5.565716e-06 -0.124675719  0.0805338809
## 22 817.1032     1 Tissue 5.534461e-06  0.046342534 -0.3170511974
## 23 532.3012     1  Green 5.401866e-06  0.044973317  0.0094036217
## 24 892.5704     1 Tissue 5.336413e-06 -0.053820090 -0.0189980687
## 25 681.4735     1 Tissue 5.210564e-06 -0.053369197 -0.0000788427
## 26 494.3251     1  Green 5.168811e-06 -0.050424746  0.0736637457
## 27 869.5409     1  Green 5.068846e-06  0.073550020  0.0248852888
## 28 778.5377     1 Tissue 4.946686e-06 -0.079799000  0.0196178338
## 29 550.3869     1 Tissue 4.447521e-06 -0.124283059  0.0313791889
## 30 585.0641     1 Tissue 4.361758e-06  0.040282256 -0.2519056637
## 31 895.7178     1  Green 4.353105e-06  0.049059295  0.0116479019
## 32 840.6551     1  Green 4.214831e-06  0.038032524  0.0052882461
## 33 518.4945     1 Tissue 3.800185e-06 -0.116155412  0.0089342539
## 34 556.2809     1  Green 3.743592e-06  0.065553143  0.0164440783
## 35 548.2752     1  Green 3.708580e-06  0.023642276  0.0018753713
## 36 533.2836     1  Green 3.688313e-06  0.052553591  0.0253268254
## 37 519.3248     1 Tissue 3.668631e-06 -0.062447281  0.0190011603
## 38 519.4972     1 Tissue 3.577097e-06 -0.043627529 -0.0063154879
## 39 510.3197     1  Green 3.416050e-06  0.010195542 -0.0106584210
## 40 643.5176     1 Tissue 3.330996e-06 -0.042901774 -0.0006004879
## 41 770.5106     1  Green 3.310994e-06  0.010928913  0.1695310979
## 42 534.2957     1 Tissue 3.310172e-06 -0.249623317  0.1266676168
## 43 794.5099     1 Tissue 3.257877e-06 -0.040255974  0.0064384661
## 44 681.0866     1 Tissue 3.209880e-06 -0.012533535 -0.0242881327
## 45 682.0926     1 Tissue 3.180273e-06  0.015624207 -0.1577486628
## 46 540.3070     1  Green 3.140796e-06  0.075542004  0.0394575209
## 47 569.4309     1 Tissue 3.127384e-06 -0.040334670  0.0464190149
## 48 784.5870     1 Tissue 3.106047e-06 -0.070455872  0.0202820852
## 49 880.7461     1  Green 2.996841e-06  0.052540406  0.0175246768
## 50 895.7142     1  Green 2.992181e-06  0.052238775  0.0297534064
##        weights     Oweights
## 1  -0.40920362 -0.252376606
## 2  -0.26415296  0.010747960
## 3   0.22341336  0.301694100
## 4   0.20871342  0.036591817
## 5   0.19806400  0.131892395
## 6   0.17070957  0.004980187
## 7  -0.15501319  0.114667114
## 8   0.15353390 -0.044630070
## 9   0.14010212 -0.069673691
## 10 -0.13780089 -0.047062501
## 11 -0.13694722 -0.205612922
## 12 -0.12044365 -0.014187122
## 13  0.11969338 -0.011004141
## 14  0.11705957  0.076252138
## 15 -0.11589129  0.033166823
## 16 -0.11522358  0.122193212
## 17  0.11244752 -0.056631298
## 18  0.11174807 -0.078389398
## 19 -0.11029210  0.101680736
## 20  0.10825400 -0.059871378
## 21 -0.10398692  0.098464405
## 22 -0.10340297 -0.140551409
## 23  0.10092563 -0.078831219
## 24 -0.09970275  0.059418261
## 25 -0.09735143  0.075725785
## 26  0.09657135 -0.097830454
## 27  0.09470365 -0.028704672
## 28 -0.09242129  0.064443339
## 29 -0.08309515  0.022737908
## 30 -0.08149279 -0.109302424
## 31  0.08133112 -0.047515459
## 32  0.07874770 -0.060816715
## 33 -0.07100065 -0.007729752
## 34  0.06994331 -0.012670134
## 35  0.06928916 -0.065439954
## 36  0.06891051 -0.014566212
## 37 -0.06854277  0.049202984
## 38 -0.06683260  0.039213922
## 39  0.06382368 -0.083453718
## 40 -0.06223458  0.039676410
## 41  0.06186088  0.099949840
## 42 -0.06184551 -0.026140874
## 43 -0.06086846  0.047418753
## 44 -0.05997171  0.041362370
## 45 -0.05941854 -0.066579390
## 46  0.05868098  0.034170545
## 47 -0.05843040  0.084182378
## 48 -0.05803175  0.029470245
## 49  0.05599140 -0.005722886
## 50  0.05590434  0.006337099
image(hostsox.opls, model=list(ncomp=comp))

scores<-hostsox.opls@resultData$`ncomp = 1`
opls.scores<-data.frame(t0=scores$Oscores, t1=scores$scores, Class=scores$classes)
#ggplot(opls.scores, aes(x=C1, y=t0.C1, color=Class)) + geom_point() + xlab('t[1]') + ylab('t[0]')

Not a predtive model with OPLS (no t-orthoganal scores calculated )

sox vs. mox

moxsox<-msidata.nociliatededge[,msidata.nociliatededge$Class %in% c('Green','Red')]
table(moxsox$Class)
## 
## Green   Red 
##   788  3022
ddd<-data.frame(pData(moxsox))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')

subset<-data.frame((pData(moxsox) %>% group_by(Class) %>% sample_n(size=700,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')

dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)

moxsox.subset<-moxsox[,rownames(pData(moxsox)) %in% coordsSubset]
table(moxsox.subset$Class)
## 
## Green   Red 
##   700   700
## Assign numbers for CV groups
pixelNo<-nrow(pData(moxsox.subset))
randNum<-sample(1:10,size = pixelNo,replace = T) ## For now, just split the data 10 ways, increase to see how changes in overnight run
pData(moxsox.subset)$cvgroup<-as.vector(randNum)
moxsox.subset.cv<-cvApply(moxsox.subset, .y = as.factor(moxsox.subset$Class), .fun = "OPLS", ncomp = 1:12, keep.Xnew = FALSE, .fold=cvgroup)
plot(summary(moxsox.subset.cv))

comp<-1
moxsox.opls<-OPLS(x=moxsox.subset,y=as.factor(moxsox.subset$Class), ncomp=comp)
summary(moxsox.opls)
## $`ncomp = 1`
##                 Green       Red
## Accuracy    0.6107143 0.6107143
## Sensitivity 0.6428571 0.5785714
## Specificity 0.5785714 0.6428571
## FDR         0.3959732 0.3816794
plot(moxsox.opls)

topLabels(moxsox.opls, n=50)
##          mz ncomp column coefficients      loadings     Oloadings
## 1  577.2627     1    Red 1.898445e-05 -0.9348025781  0.0087454935
## 2  518.3224     1    Red 1.570364e-05 -0.8680461621  0.2767757357
## 3  567.4141     1  Green 1.302543e-05  0.1113652052  0.1058539192
## 4  494.3251     1  Green 1.176194e-05  0.2943051807  0.3665062533
## 5  665.4992     1    Red 1.032269e-05 -0.0852780093  0.0202287295
## 6  681.0893     1    Red 9.539022e-06 -0.0311173249 -0.2942594605
## 7  823.6794     1  Green 9.070144e-06  0.1846168667  0.0486618857
## 8  817.1032     1    Red 8.440879e-06 -0.0074447928 -0.2261395811
## 9  546.4886     1  Green 7.562831e-06  0.1314430627  0.1304949349
## 10 755.5405     1    Red 7.083512e-06 -0.1275691036  0.0397412950
## 11 547.4731     1  Green 6.763211e-06  0.1079966352  0.0063744041
## 12 839.6536     1  Green 6.474881e-06  0.1327343627  0.0103452124
## 13 895.7178     1  Green 6.449360e-06  0.0437062793 -0.0001638785
## 14 569.4309     1    Red 6.425417e-06 -0.1449360817  0.0396003605
## 15 578.2657     1    Red 5.965041e-06 -0.2064077486 -0.0107436804
## 16 534.2957     1    Red 5.574664e-06 -0.3080696727  0.1089671557
## 17 859.6913     1  Green 5.556404e-06  0.0003340791  0.0166859299
## 18 562.4833     1  Green 5.335441e-06  0.1056025894  0.0337771755
## 19 857.0956     1    Red 5.012481e-06  0.0058696119 -0.1436634382
## 20 879.7391     1  Green 4.968246e-06  0.0181590005  0.0103639911
## 21 516.3069     1  Green 4.957690e-06 -0.0986161773  0.3093944335
## 22 733.5571     1  Green 4.729283e-06  0.0461328116  0.0862855592
## 23 585.0641     1    Red 4.684075e-06 -0.0253110804 -0.2425040386
## 24 572.3699     1    Red 4.602843e-06 -0.0627905740  0.0391476694
## 25 532.3012     1  Green 4.573289e-06  0.0018964634  0.0036623242
## 26 545.0717     1    Red 4.532697e-06  0.0149423858 -0.1408059435
## 27 732.5543     1    Red 4.444182e-06 -0.1172554073  0.2960086956
## 28 558.4859     1    Red 4.254870e-06 -0.0265024905  0.0261134963
## 29 824.6816     1  Green 3.951616e-06  0.0683280435  0.0089871383
## 30 880.7461     1  Green 3.919137e-06  0.0680624844  0.0031416892
## 31 518.3245     1  Green 3.877335e-06  0.2166695330  0.0113981441
## 32 540.3070     1    Red 3.842269e-06 -0.0088936740  0.0185064785
## 33 786.6012     1  Green 3.758817e-06  0.0378856603  0.1415546889
## 34 681.4735     1    Red 3.656944e-06 -0.0255833764  0.0042586558
## 35 496.3401     1  Green 3.654763e-06 -0.1561668761  0.3429318529
## 36 643.5176     1    Red 3.632472e-06 -0.0262226636  0.0135348796
## 37 494.3231     1  Green 3.620405e-06 -0.0035808530 -0.0030723304
## 38 730.5381     1  Green 3.587749e-06  0.0518890800  0.0342315039
## 39 532.2820     1  Green 3.536513e-06  0.0901480476  0.0416899552
## 40 682.0926     1    Red 3.530699e-06  0.0008352384 -0.1078180541
## 41 782.5685     1    Red 3.465576e-06 -0.0378687851  0.0248867929
## 42 519.4972     1    Red 3.455637e-06  0.0000842909  0.0344742169
## 43 825.6950     1    Red 3.453216e-06  0.0168740383  0.0112644377
## 44 840.6551     1  Green 3.423464e-06  0.0502866604 -0.0015783416
## 45 517.3095     1    Red 3.371007e-06 -0.0513020130  0.0285089236
## 46 569.4286     1  Green 3.283459e-06  0.0025584284  0.0472000315
## 47 860.6926     1  Green 3.279644e-06  0.0042286963  0.0037809022
## 48 413.2668     1    Red 3.275262e-06 -0.0127646070  0.0042649401
## 49 808.5815     1    Red 3.177078e-06 -0.0321089072  0.0281567874
## 50 621.4612     1  Green 3.155697e-06  0.0487938393  0.0188274489
##        weights     Oweights
## 1  -0.35743629 -0.137122651
## 2  -0.29566558  0.114857673
## 3   0.24524081 -0.024751695
## 4   0.22145197  0.332895841
## 5  -0.19435400  0.125030726
## 6  -0.17959920 -0.176658195
## 7   0.17077125  0.004317331
## 8  -0.15892355 -0.113332436
## 9   0.14239180  0.083729505
## 10 -0.13336725  0.081603049
## 11  0.12733667 -0.039602658
## 12  0.12190804 -0.020900751
## 13  0.12142755 -0.069808327
## 14 -0.12097674  0.064855497
## 15 -0.11230886 -0.018573372
## 16 -0.10495890  0.051522246
## 17  0.10461509 -0.059559595
## 18  0.10045485  0.006387471
## 19 -0.09437420 -0.072195760
## 20  0.09354135 -0.050037109
## 21  0.09334262  0.198312219
## 22  0.08904221  0.041345339
## 23 -0.08819104 -0.189122719
## 24 -0.08666160  0.075099280
## 25  0.08610517 -0.058387051
## 26 -0.08534091 -0.071985552
## 27 -0.08367436  0.306073252
## 28 -0.08011002  0.073075423
## 29  0.07440040 -0.015601053
## 30  0.07378890 -0.021115563
## 31  0.07300186  0.052396601
## 32 -0.07234164  0.067460071
## 33  0.07077043  0.106367134
## 34 -0.06885237  0.043400179
## 35  0.06881131  0.224713770
## 36 -0.06839162  0.052061708
## 37  0.06816441 -0.054405191
## 38  0.06754957  0.007490820
## 39  0.06658491  0.032305834
## 40 -0.06647544 -0.058913521
## 41 -0.06524931  0.056050125
## 42 -0.06506219  0.082019935
## 43 -0.06501661  0.066084585
## 44  0.06445644 -0.026757847
## 45 -0.06346879  0.052525304
## 46  0.06182045  0.003171703
## 47  0.06174862 -0.039467991
## 48 -0.06166611  0.043738342
## 49 -0.05981752  0.057860749
## 50  0.05941496 -0.003320468
image(moxsox.opls, model=list(ncomp=comp))

scores<-moxsox.opls@resultData$`ncomp = 1`
opls.scores<-data.frame(t0=scores$Oscores, t1=scores$scores, Class=scores$classes)
#ggplot(opls.scores, aes(x=C1, y=t0.C1, color=Class)) + geom_point() + xlab('t[1]') + ylab('t[0]')

Also not a preditive model – no t-orthoganol scores

One other possibility that would be good to consider is that the spatial variation may alter the structure of the dataset

Split Dataset by spatial coordinates

Image compelete maldifishmz dataset

image(skma, key = T, layout = c(1,1))

Take region between x=100 and 200

p<-pData(maldifishmz)[(pData(maldifishmz)$x > 100 & pData(maldifishmz)$x < 200 ),]
msi<-maldifishmz[,rownames(pData(maldifishmz)) %in% rownames(p)]

## Assign cross validation groups based on y-coordinates (1-50, 50-100,100-150,150-200)
msi$CV<-cut(msi$y, breaks=10, labels = F)

## Visualize two of the cuts
msi1<-msi[,msi$CV %in% c(1,2)]
pca.msi1<-PCA(msi1, ncomp=1:2)
image(pca.msi1, superpose=T)

Run discriminate analysis bewteen host and mox signals

hostmox<-msi[,msi$Class %in% c('Red','Tissue')]
table(hostmox$Class)
## 
##    Red Tissue 
##   1639   6728
ddd<-data.frame(pData(hostmox))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')

subset<-data.frame((pData(hostmox) %>% group_by(Class) %>% sample_n(size=400,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')

dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)

hostmox.subset<-hostmox[,rownames(pData(hostmox)) %in% coordsSubset]
table(hostmox.subset$Class)
## 
##    Red Tissue 
##    400    400
hostmox.cv<-cvApply(hostmox.subset, .y = as.factor(hostmox.subset$Class), .fun = "OPLS", ncomp = 1:12, keep.Xnew = FALSE, .fold=CV)
plot(summary(hostmox.cv))

comp<-9
opls<-OPLS(x=hostmox.subset,y=as.factor(hostmox.subset$Class), ncomp=comp)
summary(opls)
## $`ncomp = 9`
##                   Red    Tissue
## Accuracy    0.6450000 0.6450000
## Sensitivity 0.5775000 0.7125000
## Specificity 0.7125000 0.5775000
## FDR         0.3323699 0.3722467
image(opls, model=list(ncomp=comp))

scores<-opls@resultData$`ncomp = 9`
opls.scores<-data.frame(t0=scores$Oscores, t1=scores$scores, Class=scores$classes)
ggplot(opls.scores, aes(x=C1, y=t0.C1, color=Class)) + geom_point() + xlab('t[1]') + ylab('t[0]')

But the FDR is too high –> not sure how to overcome this despite being able to build a model

host vs sox

hostsox<-msi[,msi$Class %in% c('Green','Tissue')]
table(hostsox$Class)
## 
##  Green Tissue 
##    410   6728
ddd<-data.frame(pData(hostsox))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')

subset<-data.frame((pData(hostsox) %>% group_by(Class) %>% sample_n(size=400,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')

dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)

hostsox.subset<-hostsox[,rownames(pData(hostsox)) %in% coordsSubset]
table(hostsox.subset$Class)
## 
##  Green Tissue 
##    400    400
hostsox.cv<-cvApply(hostsox.subset, .y = as.factor(hostsox.subset$Class), .fun = "OPLS", ncomp = 1:12, keep.Xnew = FALSE, .fold=CV)
plot(summary(hostsox.cv))

comp<-1
opls<-OPLS(x=hostsox.subset,y=as.factor(hostsox.subset$Class), ncomp=comp)
summary(opls)
## $`ncomp = 1`
##                 Green    Tissue
## Accuracy    0.6637500 0.6637500
## Sensitivity 0.6550000 0.6725000
## Specificity 0.6725000 0.6550000
## FDR         0.3333333 0.3390663
image(opls, model=list(ncomp=comp))

scores<-opls@resultData$`ncomp = 1`
opls.scores<-data.frame(t0=scores$Oscores, t1=scores$scores, Class=scores$classes)
#ggplot(opls.scores, aes(x=C1, y=t0.C1, color=Class)) + geom_point() + xlab('t[1]') + ylab('t[0]')

Sox vs. Mox

moxsox<-msi[,msi$Class %in% c('Green','Red')]
table(moxsox$Class)
## 
## Green   Red 
##   410  1639
ddd<-data.frame(pData(moxsox))
ddd$xy<-paste(ddd$x, ddd$y,sep='-')

subset<-data.frame((pData(moxsox) %>% group_by(Class) %>% sample_n(size=400,replace = F)))
subset$xy<-paste(subset$x, subset$y,sep='-')

dnew<-ddd[ddd$xy %in% subset$xy,]
coordsSubset<-rownames(dnew)

moxsox.subset<-moxsox[,rownames(pData(moxsox)) %in% coordsSubset]
table(moxsox.subset$Class)
## 
## Green   Red 
##   400   400
moxsox.cv<-cvApply(moxsox.subset, .y = as.factor(moxsox.subset$Class), .fun = "OPLS", ncomp = 1:12, keep.Xnew = FALSE, .fold=CV)
plot(summary(moxsox.cv))

comp<-1
opls<-OPLS(x=moxsox.subset,y=as.factor(moxsox.subset$Class), ncomp=comp)
summary(opls)
## $`ncomp = 1`
##              Green    Red
## Accuracy    0.6525 0.6525
## Sensitivity 0.6525 0.6525
## Specificity 0.6525 0.6525
## FDR         0.3475 0.3475
image(opls, model=list(ncomp=comp))

scores<-opls@resultData$`ncomp = 1`
opls.scores<-data.frame(t0=scores$Oscores, t1=scores$scores, Class=scores$classes)
#ggplot(opls.scores, aes(x=C1, y=t0.C1, color=Class)) + geom_point() + xlab('t[1]') + ylab('t[0]')

Ion Distrubtion Plots

image(maldifishmz, mz=869.537,plusminus=0.5)